Uncomment and run the following 3 lines of code to install the package. You will be prompted to manually select a number to indicate whether to update dependencies and which ones.
#install.packages('devtools')
#library(devtools)
#devtools::install_github('https://github.com/weinbergerlab/InterventionEvaluatR') The goal for this analysis is to quantify changes in the rates of hospitalization due to pneumonia in different regions of Latin American countries
Let’s load the data, which are included with the package
ds<-read.csv('./prelogNEW_Mexico_state.csv')
ds<-ds[substr(ds$age_group,1,1) =='9' ,]
head(ds)
#> age_group date J12_18 A10_B99_nopneumo A17 A18 A19 A39 A41
#> 37633 9_1 2000-01-01 20 NA NA NA NA NA NA
#> 37634 9_1 2000-02-01 13 NA NA NA NA NA NA
#> 37635 9_1 2000-03-01 15 NA NA NA NA NA NA
#> 37636 9_1 2000-04-01 10 NA NA NA NA NA NA
#> 37637 9_1 2000-05-01 7 NA NA NA NA NA NA
#> 37638 9_1 2000-06-01 2 NA NA NA NA NA NA
#> B20_24 B34 B96 B97 B99 C00_D48 D50_89 E00_99 E10_14 E40_46 G00_99_SY
#> 37633 NA NA NA NA NA NA NA NA NA NA NA
#> 37634 NA NA NA NA NA NA NA NA NA NA NA
#> 37635 NA NA NA NA NA NA NA NA NA NA NA
#> 37636 NA NA NA NA NA NA NA NA NA NA NA
#> 37637 NA NA NA NA NA NA NA NA NA NA NA
#> 37638 NA NA NA NA NA NA NA NA NA NA NA
#> H00_99_SY I00_99 I60_64 cJ20_J22 K00_99 K35 K80 L00_99 M00_99 N00_99
#> 37633 NA NA NA 13 NA NA NA NA NA NA
#> 37634 NA NA NA 17 NA NA NA NA NA NA
#> 37635 NA NA NA 5 NA NA NA NA NA NA
#> 37636 NA NA NA 6 NA NA NA NA NA NA
#> 37637 NA NA NA 7 NA NA NA NA NA NA
#> 37638 NA NA NA 5 NA NA NA NA NA NA
#> N39 P00_99 P05_07 Q00_99 S00_T99 U00_99 V00_Y99 Z00_99 ach_noj
#> 37633 NA 90 26 12 NA NA NA NA 129
#> 37634 NA 80 18 5 NA NA NA NA 113
#> 37635 NA 98 16 4 NA NA NA NA 133
#> 37636 NA 106 26 12 NA NA NA NA 146
#> 37637 NA 87 18 6 NA NA NA NA 129
#> 37638 NA 85 27 4 NA NA NA NA 116
#ds$G00_99_SY <-NULL #Exclude meningitis as a control Ensure your date variable is in an R date format. If your variable is in a character or factor format, you need to tell R the format. – %m is a 2 digit month; %b is a month abbreviation (ie Jan, Feb) – %d is a 2 digit day (0-31), – %Y is a 4 digit year (e.g. 2011), %y is a 2 digit year (e.g. 11).
These codes are separated by a dash or slash or space. Modify the tryFormats script below if needed to match the format in your dataset
ds$date<-as.Date(ds$date, tryFormats=c('%Y-%m-%d',
'%m-%d-%Y',
'%m/%d/%Y',
'%Y/%m/%d',
'%d/%m/%Y'
) ) ds<-ds[order(ds$age_group, ds$date),] #Sort data by age group and monthHere we need to set a few parameters. We Use the evaluatr.init() function to specify the name of the dataset, the date at which the vaccine is introduced, the date at which we want to begin evaluating the vaccine (typically 1-2 year after vaccine introduction). We also provide some information on the dataset, sch as whether the data are monthly or quarterly (n_seasons), the variable names for the grouping variable, the date variable, the outcome variable, and the denominator variable (if any).
analysis <- evaluatr.init(
country = "Mexico_state", data = ds,
post_period_start = "2008-02-01", #First 'post-intervention' month is Jan 2012
eval_period_start = "2009-02-01", #We ignore first 12 month of data to allow for vaccine ramp up
eval_period_end = "2013-12-01", #The evaluation period lasts 2 years
n_seasons = 12, #This is monthly data, so select 12
year_def = "epi_year", # we are in southern hemisphere, so aggregate results by calendar year (Jan-Dec)
group_name = "age_group", #Strata categry name
date_name = "date", #Date variable name
outcome_name = "J12_18", #Outcome variable name
denom_name = "ach_noj" ,#Denominator variable name
set.burnN=10000,
set.sampleN=5000
)
set.seed(1)Save the results in object ‘impact_results’
n.iter<-ncol(impact_results$full$rr_iter)
thin=1
trace1<-impact_results$full$rr_iter[,seq(from=1, to=n.iter, by=thin)] #thin
for(i in 1: nrow(trace1)){
geweke.p<- pnorm(abs(geweke.diag(mcmc(trace1[i,]))$z),lower.tail=FALSE)*2
print(geweke.p)
if(geweke.p>0.05){con.stat<-'Model converged'
}else{
con.stat<-'Not converged'
}
plot(trace1[i,], type='l', main=paste0(con.stat,' ',colnames(trace1)[i]), bty='l', ylim=c(0.2,2))
}
#> var1
#> 0.362741#> var1
#> 0.2256035
#> var1
#> 0.532991
#> var1
#> 0.004133511
#> var1
#> 0.1015424
#> var1
#> 0.7423961
#> var1
#> 0.004454227
#> var1
#> 0.7486577
#> var1
#> 0.2652687
#> var1
#> 0.8571663
#> var1
#> 0.3705752
#> var1
#> 0.2969788
#> var1
#> 0.6601505
#> var1
#> 0.9622489
#> var1
#> 0.3925018
#> var1
#> 0.04765536
#> var1
#> 0.6839096
#> var1
#> 0.4085838
#> var1
#> 0.3999231
#> var1
#> 0.5864752
#> var1
#> 0.925462
#> var1
#> 0.9224339
#> var1
#> 0.8633828
#> var1
#> 0.5127307
#> var1
#> 0.9975958
#> var1
#> 0.02664334
#> var1
#> 0.182912
#> var1
#> 0.01926025
#> var1
#> 0.3963716
This shows the estimated rate ratio and 95% credible intervals from a synthetic controls analysis; a time-trend analysis where we used the specified denominator (all non-respiratory deaths) to adjust the number of pneumonia deaths in each month and a linear trend for time; a classic interrupted time series analysis (segmented regression); and the STL+PCA approach, which smooths and combines the control variables prior to including them in the model.
results.table<- cbind.data.frame(
#impact_results$best$rr_mean_intervals,
impact_results$full$rr_mean_intervals,
impact_results$time$rr_mean_intervals,
#impact_results$time_no_offset$rr_mean_intervals,
impact_results$its$rr_mean_intervals,
impact_results$pca$rr_mean_intervals)
table<-xtable(results.table)
htmlTable(table)| Synthetic controls Estimate (95% CI) | Time trend Estimate (95% CI) | Classic ITS (95% CI) | STL+PCA Estimate (95% CI) | |
|---|---|---|---|---|
| 9_1 | 0.87 (0.61, 1.15) | 0.58 (0.37, 0.89) | 0.63 (0.45, 0.89) | 0.62 (0.42, 0.87) |
| 9_10 | 0.91 (0.71, 1.16) | 0.82 (0.56, 1.21) | 1.05 (0.77, 1.46) | 1.18 (0.77, 1.87) |
| 9_11 | 1 (0.85, 1.2) | 1.24 (0.97, 1.58) | 1.2 (0.96, 1.49) | 1.03 (0.82, 1.26) |
| 9_12 | 0.81 (0.7, 0.99) | 0.84 (0.6, 1.17) | 0.89 (0.64, 1.22) | 0.76 (0.48, 1.15) |
| 9_13 | 0.84 (0.58, 1.22) | 0.79 (0.52, 1.19) | 0.82 (0.57, 1.17) | 0.75 (0.5, 1.12) |
| 9_14 | 1.22 (1.02, 1.47) | 0.95 (0.73, 1.25) | 0.89 (0.67, 1.19) | 0.93 (0.75, 1.14) |
| 9_15 | 0.91 (0.78, 1.04) | 1.09 (0.81, 1.5) | 1.11 (0.85, 1.44) | 0.65 (0.5, 0.83) |
| 9_16 | 0.84 (0.7, 1.04) | 0.8 (0.59, 1.06) | 0.79 (0.61, 1.04) | 0.63 (0.49, 0.82) |
| 9_17 | 0.76 (0.64, 0.93) | 1.07 (0.7, 1.56) | 1.11 (0.77, 1.59) | 0.6 (0.39, 0.85) |
| 9_19 | 0.65 (0.56, 0.83) | 1.06 (0.79, 1.4) | 0.9 (0.66, 1.23) | 0.57 (0.33, 0.96) |
| 9_2 | 0.93 (0.73, 1.15) | 1.04 (0.73, 1.45) | 1.01 (0.77, 1.33) | 0.92 (0.7, 1.2) |
| 9_20 | 1 (0.87, 1.15) | 1.1 (0.78, 1.59) | 1.08 (0.8, 1.46) | 0.83 (0.61, 1.12) |
| 9_21 | 0.93 (0.8, 1.05) | 0.89 (0.65, 1.2) | 0.79 (0.59, 1.07) | 0.73 (0.6, 0.89) |
| 9_22 | 0.85 (0.67, 1.17) | 1.22 (0.89, 1.71) | 1.27 (0.92, 1.72) | 1.3 (0.95, 1.82) |
| 9_23 | 1.1 (0.86, 1.36) | 0.96 (0.61, 1.44) | 0.94 (0.65, 1.35) | 1.01 (0.79, 1.29) |
| 9_24 | 0.74 (0.6, 0.89) | 0.77 (0.58, 1.01) | 0.73 (0.57, 0.94) | 0.68 (0.54, 0.86) |
| 9_25 | 0.82 (0.66, 1.06) | 1 (0.67, 1.47) | 1.02 (0.75, 1.37) | 0.76 (0.56, 1.05) |
| 9_26 | 0.66 (0.41, 0.78) | 0.63 (0.44, 0.88) | 0.85 (0.64, 1.13) | 1.05 (0.46, 2.53) |
| 9_27 | 1.13 (1, 1.28) | 1.07 (0.76, 1.51) | 0.75 (0.54, 1.04) | 0.94 (0.77, 1.12) |
| 9_28 | 0.79 (0.6, 1) | 0.92 (0.68, 1.23) | 0.91 (0.71, 1.17) | 0.97 (0.73, 1.28) |
| 9_29 | 0.97 (0.64, 1.38) | 0.99 (0.64, 1.51) | 1.09 (0.74, 1.59) | 0.75 (0.36, 1.6) |
| 9_30 | 0.85 (0.76, 0.97) | 0.96 (0.68, 1.34) | 0.82 (0.6, 1.11) | 0.82 (0.64, 1.03) |
| 9_31 | 1.07 (0.69, 1.38) | 1.24 (0.8, 1.84) | 1.27 (0.86, 1.85) | 0.97 (0.72, 1.3) |
| 9_32 | 0.82 (0.66, 0.99) | 0.61 (0.37, 1) | 0.78 (0.55, 1.11) | 0.78 (0.61, 0.98) |
| 9_4 | 0.96 (0.8, 1.14) | 1.19 (0.74, 1.87) | 1.26 (0.85, 1.87) | 0.95 (0.77, 1.16) |
| 9_5 | 0.76 (0.51, 1.06) | 0.65 (0.44, 0.92) | 0.69 (0.49, 0.96) | 0.56 (0.38, 0.85) |
| 9_7 | 0.96 (0.81, 1.1) | 1.09 (0.83, 1.42) | 1.2 (0.93, 1.55) | 0.85 (0.69, 1.02) |
| 9_8 | 1.14 (0.97, 1.32) | 1.32 (0.98, 1.8) | 0.98 (0.7, 1.37) | 0.97 (0.81, 1.14) |
| 9_9 | 1.1 (0.98, 1.22) | 1.08 (0.82, 1.43) | 1.07 (0.85, 1.35) | 1.08 (0.94, 1.23) |
How many cases were prevented from the time of vaccine introduction to the last time point in each stratum (+/- 95% CrI)? You can modify the number ‘last.point’ to pull out the cumulative number of cases at any point in the time series
last.point<-dim(impact_results$best$cumsum_prevented)[1]
cum.prevented<-impact_results$best$cumsum_prevented[last.point,,]The Synthetic controls analysis uses a Bayesian variable selection algorithm to weight the candidate covariates. In each MCMC iteration, it tests a different combination of variables. The model size indicates how many variables are selected in any given model. If <1 variable is selected on average, this indicates that no suitable control variables were identified. In this example 1-2 variables were selected on average in the 2-23m and 2-59 m age categories, while no controls were identified in the 24-59m age group (the average model size is <1 (0.44)).
model_size = data.frame(t(analysis$model_size))
htmlTable(model_size, align='c')| X9_1 | X9_10 | X9_11 | X9_12 | X9_13 | X9_14 | X9_15 | X9_16 | X9_17 | X9_19 | X9_2 | X9_20 | X9_21 | X9_22 | X9_23 | X9_24 | X9_25 | X9_26 | X9_27 | X9_28 | X9_29 | X9_30 | X9_31 | X9_32 | X9_4 | X9_5 | X9_7 | X9_8 | X9_9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 2.13 | 3.18 | 1.88 | 1.51 | 2.89 | 3.61 | 4.19 | 2.34 | 1.7 | 2.11 | 1.73 | 1.4 | 1.54 | 1.38 | 0.69 | 2.05 | 2.24 | 1.69 | 2.15 | 2.21 | 2.51 | 1.49 | 0.95 | 1.54 | 1.7 | 2.58 | 1.93 | 2.18 | 3.32 |
Here we can see which variables received the most weight in the models for each strata. Values range from 0-1, with values closer to 1 indicating that the variable was included in a larger proportion of the variable combinations that were tested A value of 1 would indicate that the variable was always included in the model, a value of 0.48 would indicate the variable was included in 48% of the models that were tested.
htmlTable(incl_probs)| Group | Greatest Inclusion Variable | Greatest Inclusion Probability | Second Greatest Inclusion Variable | Second Greatest Inclusion Probability | Third Greatest Inclusion Variable | Third Greatest Inclusion Probability | |
|---|---|---|---|---|---|---|---|
| 1 | 9_1 | cJ20_J22 | 1 | P05_07 | 0.37 | ach_noj | 0.28 |
| 2 | 9_10 | cJ20_J22 | 0.99 | ach_noj | 0.89 | P00_99 | 0.88 |
| 3 | 9_11 | cJ20_J22 | 1 | P00_99 | 0.31 | ach_noj | 0.27 |
| 4 | 9_12 | cJ20_J22 | 1 | P05_07 | 0.22 | P00_99 | 0.11 |
| 5 | 9_13 | ach_noj | 0.89 | P05_07 | 0.74 | P00_99 | 0.71 |
| 6 | 9_14 | cJ20_J22 | 1 | ach_noj | 0.7 | E00_99 | 0.69 |
| 7 | 9_15 | cJ20_J22 | 1 | E00_99 | 0.86 | P05_07 | 0.55 |
| 8 | 9_16 | cJ20_J22 | 1 | P05_07 | 0.63 | P00_99 | 0.28 |
| 9 | 9_17 | cJ20_J22 | 1 | ach_noj | 0.22 | P05_07 | 0.16 |
| 10 | 9_19 | cJ20_J22 | 1 | P05_07 | 0.56 | P00_99 | 0.25 |
| 11 | 9_2 | Q00_99 | 0.8 | ach_noj | 0.37 | P00_99 | 0.34 |
| 12 | 9_20 | cJ20_J22 | 1 | P00_99 | 0.12 | ach_noj | 0.11 |
| 13 | 9_21 | cJ20_J22 | 1 | ach_noj | 0.16 | P00_99 | 0.16 |
| 14 | 9_22 | P00_99 | 0.54 | ach_noj | 0.49 | Q00_99 | 0.18 |
| 15 | 9_23 | P00_99 | 0.26 | ach_noj | 0.26 | P05_07 | 0.17 |
| 16 | 9_24 | cJ20_J22 | 1 | P05_07 | 0.31 | P00_99 | 0.26 |
| 17 | 9_25 | cJ20_J22 | 1 | ach_noj | 0.38 | Q00_99 | 0.35 |
| 18 | 9_26 | cJ20_J22 | 1 | P00_99 | 0.31 | ach_noj | 0.14 |
| 19 | 9_27 | cJ20_J22 | 1 | P05_07 | 0.84 | P00_99 | 0.13 |
| 20 | 9_28 | cJ20_J22 | 1 | P00_99 | 0.7 | ach_noj | 0.15 |
| 21 | 9_29 | cJ20_J22 | 1 | ach_noj | 0.8 | P00_99 | 0.29 |
| 22 | 9_30 | cJ20_J22 | 1 | K00_99 | 0.18 | P05_07 | 0.08 |
| 23 | 9_31 | ach_noj | 0.26 | P00_99 | 0.23 | Z00_99 | 0.18 |
| 24 | 9_32 | cJ20_J22 | 1 | Q00_99 | 0.2 | ach_noj | 0.18 |
| 25 | 9_4 | cJ20_J22 | 1 | P00_99 | 0.27 | ach_noj | 0.22 |
| 26 | 9_5 | cJ20_J22 | 1 | ach_noj | 0.8 | P00_99 | 0.44 |
| 27 | 9_7 | cJ20_J22 | 1 | P00_99 | 0.27 | ach_noj | 0.19 |
| 28 | 9_8 | cJ20_J22 | 1 | Q00_99 | 0.34 | K00_99 | 0.26 |
| 29 | 9_9 | cJ20_J22 | 1 | A10_B99_nopneumo | 0.85 | Z00_99 | 0.77 |
This first plot shows a monthly time series, with observed, fitted, and counterfacual values. The observed number of deaths is shown in the black line. The fitted values for the pre-vaccine period are shown in the red dotted line, and the counterfactual estimate with its 95% credible interval is shown as the white dotted line and gray shaded area. if the black line is below the gray shaded area, this would indicate that obsrved cases are lower than expected based on changes in the control diseases in the post-vaccine period. If the controls appropriately adjust for underlying trends, then this would reflect an effect of the vaccine.
plots$groups[[1]]$pred_full It is sometimes easier to look at the results if we aggregate the observed and expected values up to the annual time scale. Here the observed values are shown as black dots. When the black dots go below the gray shaded area, this indicates that the observed cases are lower than expected based on changes in the control diseases in the post-vaccine period. If the controls appropriately adjust for underlying trends, then this would reflect an effect of the vaccine. The vertical line indicates the date of vaccine introduction. The year when the vaccine is introduced is included to the right of the line, even if the vaccine was introduced part-way through the year. For instance, regardless of whether the the vaccine was introduced in January 2009 or November 2009, the observed dot for 2009 would be to the right of the line.
plots$groups[[1]]$pred_full_agg Finally, we can look at the cumulative cases prevented. In this example, there have been 445 cases prevented (95%CrI: 58, 931) from the time of vaccine introduction to the last day month of the study period. This is calculated by takin the difference between the observed and fitted number of cases in each month, and summing them. If atleast 1 control disease is identified from the synthetic controls model, then the result here is drawn from that model, otherwise, it is drawn from the STL+PCA model.
plots$groups[[1]]$cumsum_prevented We instead might want to just print everything for all age groups and models. We can use the following code to do that
for (group in names(plots$groups)) {
par(mfrow=c(4,1))
print(plots$groups[[group]]$pred_full_agg )
print(plots$groups[[group]]$pred_best_agg )
print(plots$groups[[group]]$pred_time_agg )
print(plots$groups[[group]]$pred_pca_agg )
}#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_point).
The package calcuates both an equal-tail 95% credible interval and a 95% highest posterior density interval (HPD or HDI interval). The latter might be more appropriate when the posterior distribution is skewed. The objects with HDI intervals are saved with an appendix of ‘HDI’ so –impact_results\(best\)pred_quantiles gives the estimate for equal tail intervals while –impact_results\(best\)pred_quantiles_HDI gives the estimate for the HDI interval –impact_results\(best\)cumsum_prevented_hdi gives the estimate of cumulative cases prevented using HPI –impact_results\(best\)ann_pred_HDI gives the annual predicted cases with HDI intervals –impact_results\(best\)rr_mean_HDI gives the overall rate ratio caclulated with HDI intervals – impact_results\(best\)log_rr_hdi gives the pointswise log-RR with HDI intervals Here we compare the version calculated with equal-tailed intervals with HPD intervals. In this example they give very similar coverage
print("Equal tailed intervals")
#> [1] "Equal tailed intervals"
round(impact_results$best$rr_mean[,c(2,1,3)],2)
#> Point Estimate Lower CI Upper CI
#> 9_1 0.87 0.61 1.15
#> 9_10 0.91 0.71 1.16
#> 9_11 1.00 0.85 1.20
#> 9_12 0.81 0.70 0.99
#> 9_13 0.84 0.58 1.22
#> 9_14 1.22 1.02 1.47
#> 9_15 0.91 0.78 1.04
#> 9_16 0.84 0.70 1.04
#> 9_17 0.76 0.64 0.93
#> 9_19 0.65 0.56 0.83
#> 9_2 0.93 0.73 1.15
#> 9_20 1.00 0.87 1.15
#> 9_21 0.93 0.80 1.05
#> 9_22 0.85 0.67 1.17
#> 9_23 1.01 0.79 1.29
#> 9_24 0.74 0.60 0.89
#> 9_25 0.82 0.66 1.06
#> 9_26 0.66 0.41 0.78
#> 9_27 1.13 1.00 1.28
#> 9_28 0.79 0.60 1.00
#> 9_29 0.97 0.64 1.38
#> 9_30 0.85 0.76 0.97
#> 9_31 0.97 0.72 1.30
#> 9_32 0.82 0.66 0.99
#> 9_4 0.96 0.80 1.14
#> 9_5 0.76 0.51 1.06
#> 9_7 0.96 0.81 1.10
#> 9_8 1.14 0.97 1.32
#> 9_9 1.10 0.98 1.22
print("HPD intervals")
#> [1] "HPD intervals"
round(impact_results$best$rr_mean_hdi,2)
#> Point Estimate lower upper
#> 9_1 0.87 0.61 1.14
#> 9_10 0.91 0.70 1.14
#> 9_11 1.00 0.85 1.19
#> 9_12 0.81 0.69 0.97
#> 9_13 0.84 0.55 1.18
#> 9_14 1.22 1.01 1.45
#> 9_15 0.91 0.77 1.03
#> 9_16 0.84 0.70 1.04
#> 9_17 0.76 0.62 0.92
#> 9_19 0.65 0.55 0.81
#> 9_2 0.93 0.73 1.15
#> 9_20 1.00 0.87 1.15
#> 9_21 0.93 0.81 1.06
#> 9_22 0.85 0.66 1.15
#> 9_23 1.01 0.79 1.28
#> 9_24 0.74 0.60 0.89
#> 9_25 0.82 0.64 1.04
#> 9_26 0.66 0.41 0.78
#> 9_27 1.13 0.99 1.26
#> 9_28 0.79 0.59 0.98
#> 9_29 0.97 0.64 1.38
#> 9_30 0.85 0.75 0.96
#> 9_31 0.97 0.70 1.27
#> 9_32 0.82 0.66 0.98
#> 9_4 0.96 0.80 1.14
#> 9_5 0.76 0.49 1.04
#> 9_7 0.96 0.81 1.09
#> 9_8 1.14 0.97 1.31
#> 9_9 1.10 0.99 1.23Compare pointwise coverage of the 95% CrI calculated as HDI or equal-tailed. In this example they are nearly identical. The red line denote the HPD intervals, the gray line denotes the equal-tailed
matplot(impact_results$best$pred_quantiles_HDI[,c(2,3),1], type='l', col='gray', lty=c(2,1,2), bty='l', ylim=c(0, max(impact_results$best$pred_quantiles_HDI[,,1])), ylab='Count')
matplot(impact_results$best$pred_quantiles[,c(1,3),1], type='l', col='red', lty=c(2,1,2), bty='l', ylim=c(0, max(impact_results$best$pred_quantiles_HDI[,,1])), ylab='Count', add=T)
points(analysis$input_data[, analysis$outcome_name])